In [1]:
from deep_translator import GoogleTranslator
from datetime import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.font_manager
import pandas as pd
import numpy as np
import folium as f
import openpyxl
import json
%matplotlib inline

output_path = 'output'
geojson_path = 'maps/world/world-administrative-boundaries.geojson'
save_path = f'{output_path}/countries_imports.csv'

def write_csv(df, path):
    df.to_csv(path, index=True, header=True)
    
def open_csv(path):
    return pd.read_csv(path)

def open_json(path):
    return json.load(open(path))

def get_date():
    return str(dt.now().strftime("%Y-%m-%d-%H-%M-%S"))

def save_map(m):
    m.save(f"{output_path}\map.html")

def save_plot(plt, direction):
    return "Image saved" if plt.savefig(f'{output_path}\\{direction}_barplot-{get_date()}.png', dpi=1200, bbox_inches='tight') else None
In [2]:
# Reading
path = 'raw_data/Perfil_EmpresasImpo_2022_WEB.xlsx'
df = pd.read_excel(path, engine='openpyxl')
In [3]:
# Cleaning 
df = pd.DataFrame(df['País_origen'].value_counts())
df.dropna()

# Modificaré las columnas con rename(), así que no coloco los países como índices.
df = df.reset_index()

# Filtering
mask = (df['País_origen'] == 'Sin información') | (df['País_origen'] == 'República Dominicana')
df = df[~mask]
df.rename(columns={"País_origen":"Country", "count":"Total_imports"}, inplace=True)
df
Out[3]:
Country Total_imports
0 China 203794
1 Estados Unidos de América 154279
2 España 25455
3 Italia 17690
4 México 17593
... ... ...
186 San Cristóbal y Nieves 1
187 Brunéi 1
188 Libia 1
189 Burkina Faso 1
190 Islas Åland 1

190 rows × 2 columns

In [ ]:
# Pruebas con los nombres de algunos países
# dictionary={'ä':'a','ö':'o','Ä':'A','å':'a', 'Å': 'a', 'ü': 'u'}
# df['Country'] = df['Country'].replace(dictionary, regex=True, inplace=True)
In [4]:
# Country column translation in order to match geojson countries for displaying the data on the map.
df['Country'] = df['Country'].apply(lambda x: str(GoogleTranslator(source='es', target='en').translate(x)))
df['Country'] = df['Country'].replace(['The Savior', 'Türkiye'], ['El Salvador', 'Turkey'])
df['Country'] = df['Country'].apply(lambda x: str(x.split('(')[0].strip()))

df
Out[4]:
Country Total_imports
0 China 203794
1 United States of America 154279
2 Spain 25455
3 Italy 17690
4 Mexico 17593
... ... ...
186 Saint Kitts and Nevis 1
187 Brunei 1
188 Libya 1
189 Burkina Faso 1
190 Aland Islands 1

190 rows × 2 columns

In [5]:
# Setting Country as the DataFrame index.
df.set_index('Country', inplace=True)
df
Out[5]:
Total_imports
Country
China 203794
United States of America 154279
Spain 25455
Italy 17690
Mexico 17593
... ...
Saint Kitts and Nevis 1
Brunei 1
Libya 1
Burkina Faso 1
Aland Islands 1

190 rows × 1 columns

In [6]:
# Saving processed data
write_csv(df, save_path)
In [7]:
# Ahora trabajaré con la data procesada.
df = open_csv(save_path)
df.set_index('Country', inplace=True)

# Organizamos la data de manera descendente.
df = df.sort_values(by='Total_imports', ascending=False)
df
Out[7]:
Total_imports
Country
China 203794
United States of America 154279
Spain 25455
Italy 17690
Mexico 17593
... ...
Tuvalu 1
Mali 1
South Georgia and the South Sandwich Islands 1
Montserrat 1
Aland Islands 1

190 rows × 1 columns

In [8]:
# Horizontal Bars Plot
fig, ax = plt.subplots()
bp = df[:10] # Top 10

# Save the chart so we can loop through the bars below.
bars = ax.bar(
    x=np.arange(bp.size),
    height=bp['Total_imports'],
    tick_label=bp.index
)

# Axis formatting.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)

# Add text annotations to the top of the bars.
bar_color = bars[0].get_facecolor()
for bar in bars:
  ax.text(
      bar.get_x() + bar.get_width() / 2,
      bar.get_height() + 0.3,
      round(bar.get_height(), 1),
      horizontalalignment='center',
      verticalalignment='bottom',
      color=bar_color,
      weight='bold'
  )

# Add labels and a title.
ax.set_xlabel('País de origen (p)', labelpad=15, color='#333333')
ax.set_ylabel('Cantidad de importaciones (n)', labelpad=15, color='#333333')
ax.set_title('Importaciones de mercancía [2022]', pad=15, color='#333333',
             weight='bold')

ax.margins(0.01, None)
fig.autofmt_xdate()
fig.tight_layout()
save_plot(fig, 'vertical')
In [9]:
# Ordenamos de manera ascendente para graficar de abajo hacia arriba.
bp = bp.sort_values(by='Total_imports', ascending=True)

# Spliting data for x and y.
names = bp.index
values = bp['Total_imports']
lim = bp['Total_imports'][-1] * 1.1

# Setting font
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.sans-serif'] = 'DejaVu Sans'

# Set the style of the axes and the text color
plt.rcParams['axes.edgecolor']='#333F4B'
plt.rcParams['axes.linewidth']=0.8
plt.rcParams['xtick.color']='#333F4B'
plt.rcParams['ytick.color']='#333F4B'
plt.rcParams['text.color']='#333F4B'

# Numeric placeholder for the y axis
my_range=list(range(1, len(bp.index)+1))

fig, ax = plt.subplots(figsize=(5,3.5))

# Create for each expense type an horizontal line that starts at x = 0 with the length 
# represented by the specific expense percentage value.
plt.hlines(y=my_range, xmin=0, xmax=bp['Total_imports'], color='#007ACC', alpha=0.2, linewidth=5)

# create for each expense type a dot at the level of the expense percentage value
plt.plot(bp['Total_imports'], my_range, "o", markersize=5, color='#007ACC', alpha=0.6)

# set labels
ax.set_xlabel('Importaciones', fontsize=15, fontweight='black', color = '#333F4B')
ax.set_ylabel('')

# set axis
ax.tick_params(axis='both', which='major', labelsize=12)
plt.yticks(my_range, bp.index)

# add an horizonal label for the y axis 
fig.text(-0.23, 0.96, 'País de Origen', fontsize=15, fontweight='black', color = '#333F4B')

# change the style of the axis spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.spines['left'].set_bounds((1, len(my_range)))
ax.set_xlim(0, lim)

ax.spines['left'].set_position(('outward', 8))
ax.spines['bottom'].set_position(('outward', 5))

save_plot(plt, 'horizontal')
In [10]:
# Displaying the data on the map.
center = [35.762887,84.083132]
mapa = f.Map(location=center, zoom_start=2,
            min_zoom=1, max_bounds=True,
            min_lat=-84, min_lon=-175,
            max_lat=84,  max_lon=187,
            control_scale=True,
            tiles="cartodb positron"
    )

file_json = geojson_path
data_geojson = open_json(file_json)

f.Choropleth(geo_data=data_geojson,
                name = 'Importaciones',
                data=df,
                columns=(df.index, 'Total_imports'),
                key_on="properties.name",
                fill_color="Pastel2",
                fill_opacity=.7,
                line_opacity=.1,
                nan_fill_color='gray',
                line_color = "#0000",
                show=True,
                overlay=True,
                nan_fill_opacity=0.1,
                legend_name='Países que exportaron mercancía hacia República Dominicana (2022)',
                highlight= True,
                reset=True
).add_to(mapa)

f.LayerControl().add_to(mapa)
mapa
Out[10]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [11]:
save_map(mapa)
In [12]:
print("Total de paises que importaron a República Dominicana en 2022:", len(df.index)) 
Total de paises que importaron a República Dominicana en 2022: 190
In [ ]: